From 78938b42ad38d4c475e1a041914ef5e4518c0d39 Mon Sep 17 00:00:00 2001 From: Tim Deegan Date: Thu, 8 Mar 2007 16:38:15 +0000 Subject: [PATCH] [HVM] Save/restore: make sure all ioreqs are gone when a domain is saved by pausing the domain and pulling them through. Signed-off-by: Tim Deegan --- tools/ioemu/hw/ide.c | 57 ++++++++++++++++++++++++---- tools/ioemu/target-i386-dm/helper2.c | 21 ++++++++++ tools/ioemu/vl.h | 1 + tools/libxc/xc_hvm_restore.c | 8 ++++ tools/libxc/xc_hvm_save.c | 23 ++++++++++- tools/libxc/xenguest.h | 2 + tools/libxc/xg_private.c | 5 +++ xen/arch/x86/hvm/hvm.c | 48 +++++++++++++++++++++++ xen/include/public/hvm/hvm_op.h | 8 ++++ 9 files changed, 163 insertions(+), 10 deletions(-) diff --git a/tools/ioemu/hw/ide.c b/tools/ioemu/hw/ide.c index c4db9fe738..274dd8f850 100644 --- a/tools/ioemu/hw/ide.c +++ b/tools/ioemu/hw/ide.c @@ -396,17 +396,41 @@ typedef struct PCIIDEState { #ifdef DMA_MULTI_THREAD +static pthread_t ide_dma_thread; static int file_pipes[2]; static void ide_dma_loop(BMDMAState *bm); static void dma_thread_loop(BMDMAState *bm); +extern int suspend_requested; static void *dma_thread_func(void* opaque) { BMDMAState* req; - - while (read(file_pipes[0], &req, sizeof(req))) { - dma_thread_loop(req); + fd_set fds; + int rv, nfds = file_pipes[0] + 1; + struct timeval tm; + + while (1) { + + /* Wait at most a second for the pipe to become readable */ + FD_ZERO(&fds); + FD_SET(file_pipes[0], &fds); + tm.tv_sec = 1; + tm.tv_usec = 0; + rv = select(nfds, &fds, NULL, NULL, &tm); + + if (rv != 0) { + if (read(file_pipes[0], &req, sizeof(req)) == 0) + return NULL; + dma_thread_loop(req); + } else { + if (suspend_requested) { + /* Need to tidy up the DMA thread so that we don't end up + * finishing operations after the domain's ioreqs are + * drained and its state saved */ + return NULL; + } + } } return NULL; @@ -414,24 +438,41 @@ static void *dma_thread_func(void* opaque) static void dma_create_thread(void) { - pthread_t tid; int rt; + pthread_attr_t a; if (pipe(file_pipes) != 0) { fprintf(stderr, "create pipe failed\n"); exit(1); } - if ((rt = pthread_create(&tid, NULL, dma_thread_func, NULL))) { + if ((rt = pthread_attr_init(&a)) + || (rt = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_JOINABLE))) { + fprintf(stderr, "Oops, dma thread attr setup failed, errno=%d\n", rt); + exit(1); + } + + if ((rt = pthread_create(&ide_dma_thread, &a, dma_thread_func, NULL))) { fprintf(stderr, "Oops, dma thread creation failed, errno=%d\n", rt); exit(1); } +} - if ((rt = pthread_detach(tid))) { - fprintf(stderr, "Oops, dma thread detachment failed, errno=%d\n", rt); - exit(1); +void ide_stop_dma_thread(void) +{ + int rc; + /* Make sure the IDE DMA thread is stopped */ + if ( (rc = pthread_join(ide_dma_thread, NULL)) != 0 ) + { + fprintf(stderr, "Oops, error collecting IDE DMA thread (%s)\n", + strerror(rc)); } } + +#else +void ide_stop_dma_thread(void) +{ +} #endif /* DMA_MULTI_THREAD */ #if defined(__ia64__) diff --git a/tools/ioemu/target-i386-dm/helper2.c b/tools/ioemu/target-i386-dm/helper2.c index dc3394d5b7..7a5f3412c9 100644 --- a/tools/ioemu/target-i386-dm/helper2.c +++ b/tools/ioemu/target-i386-dm/helper2.c @@ -577,7 +577,28 @@ int main_loop(void) destroy_hvm_domain(); else { char qemu_file[20]; + ioreq_t *req; + int rc; + sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid); + xc_domain_pause(xc_handle, domid); + + /* Pull all outstanding ioreqs through the system */ + handle_buffered_io(env); + main_loop_wait(1); /* For the select() on events */ + + /* Stop the IDE thread */ + ide_stop_dma_thread(); + + /* Make sure that all outstanding IO responses are handled too */ + if ( xc_hvm_drain_io(xc_handle, domid) != 0 ) + { + fprintf(stderr, "error clearing ioreq rings (%s)\n", + strerror(errno)); + return -1; + } + + /* Save the device state */ if (qemu_savevm(qemu_file) < 0) fprintf(stderr, "qemu save fail.\n"); } diff --git a/tools/ioemu/vl.h b/tools/ioemu/vl.h index 8535ba689c..a6b53aa312 100644 --- a/tools/ioemu/vl.h +++ b/tools/ioemu/vl.h @@ -843,6 +843,7 @@ void pci_cmd646_ide_init(PCIBus *bus, BlockDriverState **hd_table, void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn); int pmac_ide_init (BlockDriverState **hd_table, SetIRQFunc *set_irq, void *irq_opaque, int irq); +void ide_stop_dma_thread(void); /* cdrom.c */ int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track); diff --git a/tools/libxc/xc_hvm_restore.c b/tools/libxc/xc_hvm_restore.c index 001fe2a6e4..a5e51e84e8 100644 --- a/tools/libxc/xc_hvm_restore.c +++ b/tools/libxc/xc_hvm_restore.c @@ -281,6 +281,14 @@ int xc_hvm_restore(int xc_handle, int io_fd, else shared_page_nr = (v_end >> PAGE_SHIFT) - 1; + /* Paranoia: clean pages. */ + if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) || + xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) || + xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) { + ERROR("error clearing comms frames!\n"); + goto out; + } + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); diff --git a/tools/libxc/xc_hvm_save.c b/tools/libxc/xc_hvm_save.c index d1b44bd1c6..c422e296e6 100644 --- a/tools/libxc/xc_hvm_save.c +++ b/tools/libxc/xc_hvm_save.c @@ -27,12 +27,14 @@ #include #include #include -#include #include "xc_private.h" #include "xg_private.h" #include "xg_save_restore.h" +#include +#include + /* ** Default values for important tuning parameters. Can override by passing ** non-zero replacement values to xc_hvm_save(). @@ -49,12 +51,29 @@ static unsigned long max_mfn; /* virtual starting address of the hypervisor */ static unsigned long hvirt_start; -/* #levels of page tables used by the currrent guest */ +/* #levels of page tables used by the current guest */ static unsigned int pt_levels; /* total number of pages used by the current guest */ static unsigned long max_pfn; +int xc_hvm_drain_io(int handle, domid_t dom) +{ + DECLARE_HYPERCALL; + xen_hvm_drain_io_t arg; + int rc; + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_drain_io; + hypercall.arg[1] = (unsigned long)&arg; + arg.domid = dom; + if ( lock_pages(&arg, sizeof(arg)) != 0 ) + return -1; + rc = do_xen_hypercall(handle, &hypercall); + unlock_pages(&arg, sizeof(arg)); + return rc; +} + /* ** During (live) save/migrate, we maintain a number of bitmaps to track ** which pages we have to send, to fixup, and to skip. diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index cb3c6a4d1c..0f77217978 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -155,6 +155,8 @@ int xc_set_hvm_param( int xc_get_hvm_param( int handle, domid_t dom, int param, unsigned long *value); +int xc_hvm_drain_io(int handle, domid_t dom); + /* PowerPC specific. */ int xc_prose_build(int xc_handle, uint32_t domid, diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c index 63ffea4aad..2332ef4c6c 100644 --- a/tools/libxc/xg_private.c +++ b/tools/libxc/xg_private.c @@ -229,6 +229,11 @@ __attribute__((weak)) int xc_set_hvm_param( return -ENOSYS; } +__attribute__((weak)) int xc_hvm_drain_io(int handle, domid_t dom) +{ + return -ENOSYS; +} + /* * Local variables: * mode: C diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index d03824bc0e..09515203de 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -146,6 +146,48 @@ void hvm_do_resume(struct vcpu *v) } } +/* Called from the tools when saving a domain to make sure the io + * request-response ring is entirely empty. */ +static int hvmop_drain_io( + XEN_GUEST_HANDLE(xen_hvm_drain_io_t) uop) +{ + struct xen_hvm_drain_io op; + struct domain *d; + struct vcpu *v; + ioreq_t *p; + int rc; + + if ( copy_from_guest(&op, uop, 1) ) + return -EFAULT; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + d = rcu_lock_domain_by_id(op.domid); + if ( d == NULL ) + return -ESRCH; + + rc = -EINVAL; + /* Can't do this to yourself, or to a domain without an ioreq ring */ + if ( d == current->domain || !is_hvm_domain(d) || get_sp(d) == NULL ) + goto out; + + rc = 0; + + domain_pause(d); /* It's not safe to do this to running vcpus */ + for_each_vcpu(d, v) + { + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + if ( p->state == STATE_IORESP_READY ) + hvm_io_assist(v); + } + domain_unpause(d); + + out: + rcu_unlock_domain(d); + return rc; +} + int hvm_domain_initialise(struct domain *d) { int rc; @@ -916,6 +958,12 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) guest_handle_cast(arg, xen_hvm_set_pci_link_route_t)); break; + case HVMOP_drain_io: + rc = hvmop_drain_io( + guest_handle_cast(arg, xen_hvm_drain_io_t)); + break; + + default: { gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op); diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h index f568050949..f03ad288bb 100644 --- a/xen/include/public/hvm/hvm_op.h +++ b/xen/include/public/hvm/hvm_op.h @@ -70,4 +70,12 @@ struct xen_hvm_set_pci_link_route { typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); +/* Drain all outstanding qemu-dm IO responses from a domain's ioreq ring. */ +#define HVMOP_drain_io 5 +struct xen_hvm_drain_io { + domid_t domid; +}; +typedef struct xen_hvm_drain_io xen_hvm_drain_io_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_drain_io_t); + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ -- 2.30.2